In [1]:
import os
import torch
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import plotly.graph_objects as go
In [2]:
os.chdir('/Users/vincent/Desktop/Capstone Project/Code')
In [3]:
from synthetic import sin_single
from visualize import generate_metrics_table
from data import InRAMDataset, RigidDataLoader
from benchmarks import InverseVolatility, MaximumReturn, MinimumVariance, OneOverN, Random 
from losses import SharpeRatio, MaximumDrawdown, MeanReturns
from metrics import maximum_drawdown
from transaction import TransactionCost
In [4]:
os.chdir('/Users/vincent/Desktop/Capstone Project/Data')
In [5]:
synthetic_weights = pd.read_parquet('synthetic_weights.prq')
synthetic_weights.columns = np.arange(20) 
In [6]:
#for replication purposes
torch.manual_seed(4)
np.random.seed(5)
In [7]:
# Dataset creation and loading
# ^^^^^^^^^^^^^^^^^^^^^^^^^^^^
# In this example, we are going to be using a synthetic dataset. Asset returns are going to be
# sine functions where the frequency and phase are randomly selected for each asset. First of
# all let us set all the parameters relevant to data creation.
n_timesteps, n_assets = 1000, 20
lookback, gap, horizon = 36, 0, 6
n_samples = n_timesteps - lookback - horizon - gap + 1
In [8]:
# %%
# Additionally, we will use approximately 80% of the data for training and 20% for testing.
split_ix = int(n_samples * 0.8)
indices_train = list(range(split_ix))
indices_test = list(range(split_ix + lookback + horizon, n_samples))

print('Train range: {}:{}\nTest range: {}:{}'.format(indices_train[0], indices_train[-1],
                                                     indices_test[0], indices_test[-1]))
Train range: 0:766
Test range: 809:958
In [9]:
# Now we can generate the synthetic asset returns of with shape :code:`(n_timesteps, n_assets)`.
returns = np.array([sin_single(n_timesteps,
                               freq=1 / np.random.randint(3, lookback),
                               amplitude=0.05,
                               phase=np.random.randint(0, lookback)
                               ) for _ in range(n_assets)]).T
In [10]:
# %%
# We also add some noise.
returns += np.random.normal(scale=0.02, size=returns.shape)
In [11]:
# To obtain the feature matrix :code:`X` and the target :code:`y` we apply the rolling window
# strategy.
X_list, y_list = [], []

for i in range(lookback, n_timesteps - horizon - gap + 1):
    X_list.append(returns[i - lookback: i, :])
    y_list.append(returns[i + gap: i + gap + horizon, :])

X = np.stack(X_list, axis=0)[:, None, ...]
y = np.stack(y_list, axis=0)[:, None, ...]

print('X: {}, y: {}'.format(X.shape, y.shape))
X: (959, 1, 36, 20), y: (959, 1, 6, 20)
In [12]:
dataset = InRAMDataset(X, y, asset_names=np.arange(n_assets))
In [13]:
dataloader_test = RigidDataLoader(dataset,
                                  indices=indices_test,
                                  batch_size=32)
In [14]:
benchmark = {'InvVol': InverseVolatility(),
             'MaxRet': MaximumReturn(),
             'MinVar': MinimumVariance(),
             'EW': OneOverN(),
             'Random': Random()}
In [15]:
metrics = {
    'MaxDD': MaximumDrawdown(),
    'Sharpe': SharpeRatio(),
    'MeanReturn': MeanReturns()
}
In [16]:
index = np.concatenate([np.asarray(indices_test), np.asarray([indices_test[-1] + 1])])
In [17]:
#Initialize all PVM to equal weight portfolios
PVM_dict = {b_name: pd.DataFrame(1/n_assets, index=index, columns=np.arange(n_assets))
            for b_name in benchmark.keys()}
In [18]:
# Let us now use the above created objects. We first generate a table with all metrics over all
# samples and for all benchmarks. This is done via :code:`generate_metrics_table`.
metrics_table = generate_metrics_table(benchmark,
                                       dataloader_test,
                                       metrics,
                                       PVM_dict)
In [19]:
y_test = y[indices_test]
In [20]:
y_test = y_test[:, 0, 0, :]
In [26]:
port_ret = (synthetic_weights * y_test).sum(axis=1)
In [27]:
fig = go.Figure()
for b_name in PVM_dict.keys():
    weights = PVM_dict[b_name].iloc[1:, :]
    ret = (weights * y_test).sum(axis=1)
    fig.add_trace(go.Scatter(x=ret.index,
                             y=ret,
                             name=b_name))
fig.add_trace(go.Scatter(x=port_ret.index,
                         y=port_ret,
                         name='Network'))
fig.update_layout(xaxis_title='timestamps',
                  yaxis_title='return')

fig.show()
In [28]:
fig = go.Figure()
for b_name in PVM_dict.keys():
    weights = PVM_dict[b_name].iloc[1:, :]
    ret = (weights * y_test).sum(axis=1)
    fig.add_trace(go.Scatter(x=ret.index,
                             y=np.cumprod(ret + 1),
                             name=b_name))
fig.add_trace(go.Scatter(x=port_ret.index,
                         y=np.cumprod(port_ret + 1),
                         name='Network'))
fig.update_layout(xaxis_title='timestamps',
                  yaxis_title='price')

fig.show()
In [29]:
Sharpe_Ratio = {}
for b_name in PVM_dict.keys():
    weights = PVM_dict[b_name].iloc[1:, :]
    ret = (weights * y_test).sum(axis=1)
    Sharpe_Ratio[b_name] = ret.mean()/ret.std()
Sharpe_Ratio['network'] = port_ret.mean()/port_ret.std()
In [31]:
Sharpe_Ratio = pd.Series(Sharpe_Ratio)
In [33]:
fig = go.Figure()
fig.add_trace(go.Bar(x=Sharpe_Ratio.index,
                     y=Sharpe_Ratio.values))
fig.update_layout(xaxis_title='Strategy',
                  yaxis_title='Sharpe Ratio')

fig.show()
In [35]:
Sortino_Ratio = {}
for b_name in PVM_dict.keys():
    weights = PVM_dict[b_name].iloc[1:, :]
    ret = (weights * y_test).sum(axis=1)
    Sortino_Ratio[b_name] = ret.mean()/np.mean(np.min(ret, 0)**2)
Sortino_Ratio['network'] = port_ret.mean()/np.mean(np.min(ret, 0)**2)
In [36]:
Sortino_Ratio = pd.Series(Sortino_Ratio)
In [37]:
fig = go.Figure()
fig.add_trace(go.Bar(x=Sortino_Ratio.index,
                     y=Sortino_Ratio.values))
fig.update_layout(xaxis_title='Strategy',
                  yaxis_title='Sortino Ratio')

fig.show()
In [38]:
MDD = {}
for b_name in PVM_dict.keys():
    weights = PVM_dict[b_name].iloc[1:, :]
    ret = (weights * y_test).sum(axis=1)
    MDD[b_name] = maximum_drawdown(ret)
MDD['network'] = maximum_drawdown(port_ret)
In [39]:
MDD = pd.Series(MDD)
In [40]:
fig = go.Figure()
fig.add_trace(go.Bar(x=MDD.index,
                     y=MDD.values))
fig.update_layout(xaxis_title='Strategy',
                  yaxis_title='Maximum Drawdown')

fig.show()
In [41]:
def compute_transaction_cost(weights, abs_ret):
    dates = weights.index
    tc_array = []
    for i in range(1, len(dates)):
        prev_w = weights.loc[dates[i-1]] * (1 + abs_ret.loc[dates[i-1]])
        prev_w /= np.sum(prev_w)
        mu = TC(prev_w.values, weights.loc[dates[i]].values)
        tc_array.append(1 - mu)
    return np.mean(tc_array)
In [44]:
transaction_cost = {}
TC = TransactionCost(selling_cost=5e-4, purchasing_cost=5e-4)
for b_name in PVM_dict.keys():
    weights = PVM_dict[b_name].iloc[1:, :]
    transaction_cost[b_name] = compute_transaction_cost(weights, pd.DataFrame(y_test,
                                                                              index=weights.index,
                                                                              columns=weights.columns))
transaction_cost['network'] = compute_transaction_cost(synthetic_weights, pd.DataFrame(y_test,
                                                                                       index=synthetic_weights.index,
                                                                                       columns=synthetic_weights.columns))
In [45]:
transaction_cost = pd.Series(transaction_cost)
In [46]:
fig = go.Figure()
fig.add_trace(go.Bar(x=transaction_cost.index,
                     y=transaction_cost.values))
fig.update_layout(xaxis_title='Strategy',
                  yaxis_title='Transaction Cost')

fig.show()
In [48]:
# weights = PVM_dict['offline'].iloc[1:, :]
weights = synthetic_weights.copy()
fig = go.Figure()
for col in weights.columns:
    fig.add_trace(go.Bar(x=weights.index,
                         y=weights[col],
                         name=col))
fig.update_layout(xaxis_title='timestamps', yaxis_title='weights',
                  barmode='stack', bargap=0)
fig.show()
In [ ]: